Chapter 10 Joint Species Distribution Modelling - output analysis

rm(list=ls()) #clear environment
load("data/squirrels_data.Rdata")
options(contrasts = c('contr.sum','contr.poly'))

# Select desired support threshold
support_threshold=0.9
negsupport_threshold=1-support_threshold
# Load modelchains 
load("hmsc/fit_model.red_250_10.Rdata")
m.red <- m
cv.red <- cv

load("hmsc/fit_model.grey_250_10.Rdata")
m.grey <- m
cv.grey <- cv

rm(m,cv)

levels <- c("index500", "season", "logseqdepth", "Random: animal", "Random: sampling_site")

# Basal tree
hmsc_tree <- genome_tree %>%
        keep.tip(., tip=m.red$spNames) 

10.1 Variance partitioning

10.1.1 Red squirrel

# Compute variance partitioning
varpart.red=computeVariancePartitioning(m.red)

varpart.red$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=levels)) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_wukj8pe6prr40yi3tasi
variable mean sd
index500 4.153817 5.899410
season 17.565400 7.312821
logseqdepth 7.047628 5.859817
Random: animal 59.430963 18.634969
Random: sampling_site 11.802193 13.741861
preds = computePredictedValues(m.red) 
MF = evaluateModelFit(hM=m.red, predY=preds) 
hist(MF$R2, xlim = c(0,1), main=paste0("Mean = ", round(mean(MF$R2),2)))

# Varpart table
varpart_table.red <- varpart.red$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(levels))) %>%
   mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label)))

# Phylums
phylums <- genome_metadata %>%
    filter(genome %in% hmsc_tree$tip.label) %>%
    arrange(match(genome, hmsc_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

# Basal ggtree
varpart_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#122f3d","#83bb90","#cccccc","#ed8a45","#f6de9c","#b2b530","#be3e2b","#12738f")) +
        geom_fruit(
             data=varpart_table.red,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

10.1.2 Grey squirrel

# Compute variance partitioning
varpart.grey=computeVariancePartitioning(m.grey)

varpart.grey$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=levels)) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_eemsk8ao1jjl36nyepg8
variable mean sd
index500 3.141512 3.309444
season 4.515606 5.616803
logseqdepth 33.570083 22.785619
Random: animal 46.637773 23.876555
Random: sampling_site 12.135027 14.569624
preds = computePredictedValues(m.grey) 
MF = evaluateModelFit(hM=m.grey, predY=preds) 
hist(MF$R2, xlim = c(0,1), main=paste0("Mean = ", round(mean(MF$R2),2)))

# Varpart table
varpart_table.grey <- varpart.grey$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(levels))) %>%
   mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label)))

# Phylums
phylums <- genome_metadata %>%
    filter(genome %in% hmsc_tree$tip.label) %>%
    arrange(match(genome, hmsc_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

# Basal ggtree
varpart_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#122f3d","#83bb90","#cccccc","#ed8a45","#f6de9c","#b2b530","#be3e2b","#12738f")) +
        geom_fruit(
             data=varpart_table.grey,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

10.2 Model fit

MFCV.red <- evaluateModelFit(hM=m.red, predY=cv.red)
mean(MFCV.red$R2, na.rm = TRUE)
[1] 0.1413092
# genome_fit <- tibble(genome=m.red$spNames, r2 = MFCV.red[[2]])
  
# genome_counts %>%
# select(genome, any_of(red_samples)) %>%
# mutate_if(is.numeric, ~ . / sum(.)) %>%
# left_join(genome_fit, by="genome") %>%
# filter(r2>0.10) %>%
# select(-c(genome,r2)) %>%
# colSums() %>%
# hist()

var_pred_table.red <- tibble(mag=m.red$spNames,
       pred=MFCV.red$R2,
       var_pred=MFCV.red$R2 * varpart.red$vals[1,],
       support=getPostEstimate(hM=m.red, parName="Beta")$support %>% .[2,],
       estimate=getPostEstimate(hM=m.red, parName="Beta")$mean %>% .[2,]) 


MFCV.grey <- evaluateModelFit(hM=m.grey, predY=cv.grey)
mean(MFCV.grey$R2, na.rm = TRUE)
[1] 0.2293484
# genome_fit <- tibble(genome=m.red$spNames, r2 = MFCV.red[[2]])

# genome_counts %>%
# select(genome, any_of(grey_samples)) %>%
# mutate_if(is.numeric, ~ . / sum(.)) %>%
# left_join(var_pred_table.red, by=join_by("genome"=="mag")) %>%
# filter(var_pred>=0.005) %>%
# select(-c(genome,pred, var_pred, support, estimate)) %>%
# colSums() %>%
# hist()

var_pred_table.grey <- tibble(mag=m.grey$spNames,
       pred=MFCV.grey$R2,
       var_pred=MFCV.grey$R2 * varpart.grey$vals[1,],
       support=getPostEstimate(hM=m.grey, parName="Beta")$support %>% .[2,],
       estimate=getPostEstimate(hM=m.grey, parName="Beta")$mean %>% .[2,]) 

10.3 Predictive MAGs

pred_mags.red<- var_pred_table.red%>%
  filter(var_pred>=0.005) %>%
  pull(mag)

red_samples <- sample_metadata %>%
  filter(species == "Sciurus vulgaris") %>%
  dplyr::select(sample) %>%
  pull()

pred.ab.red <- genome_counts %>% 
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  filter(genome %in% m.red$spNames) %>%
   rowwise() %>% #compute for each row (genome)
   mutate(
    mean = mean(c_across(all_of(red_samples)), na.rm = TRUE),  # Get mean genome counts across red samples
    prev = sum(c_across(all_of(red_samples)) > 0, na.rm = TRUE) / sum(!is.na(c_across(all_of(red_samples)))), # Proportion of non-zero values
   subset = ifelse(genome %in% pred_mags.red, 'pred', 'nonpred')) %>%
   dplyr::select(genome, subset, mean, prev) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(subset,-mean)

pred.ab.red %>%
  ggplot(., aes(x=mean, y=genome, color=subset)) +
  geom_point(size=3) +
  theme_bw()+
    theme(axis.text.y = element_blank())

pred.ab.red %>%
  ggplot(., aes(x=prev, y=genome, color=subset)) +
  geom_point(size=3) +
  theme_bw()+
    theme(axis.text.y = element_blank())

pred_mags.grey<- var_pred_table.grey%>%
  filter(var_pred>=0.005) %>%
  pull(mag)

grey_samples <- sample_metadata %>%
  filter(species=="Sciurus carolinensis") %>%
  dplyr::select(sample) %>% pull()#

pred.ab.grey <- genome_counts %>% 
  #mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  filter(genome %in% m.red$spNames) %>%
   rowwise() %>% #compute for each row (genome)
   mutate(
    mean = mean(c_across(all_of(grey_samples)), na.rm = TRUE),  # Get mean genome counts across red samples
    prev = sum(c_across(all_of(grey_samples)) > 0, na.rm = TRUE) / sum(!is.na(c_across(all_of(grey_samples)))), # Proportion of non-zero values
   subset = ifelse(genome %in% pred_mags.red, 'pred', 'nonpred')) %>%
   dplyr::select(genome, subset, mean, prev) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(subset,-mean)

pred.ab.grey %>%
  ggplot(., aes(x=mean, y=genome, color=subset)) +
  geom_point(size=3) +
  theme_bw()+
    theme(axis.text.y = element_blank())

pred.ab.grey %>%
  ggplot(., aes(x=prev, y=genome, color=subset)) +
  geom_point(size=3) +
  theme_bw()+
    theme(axis.text.y = element_blank())

10.4 Posterior estimates

10.4.1 Red squirrel

# Posterior estimates 

post_estimates_red <- getPostEstimate(hM=m.red, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m.red$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) 

post_table_red <- post_estimates_red %>%
    mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label))) %>%
    select(-trend) %>%
    mutate(value = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
            index500=3,
            autumn=4,
            winter=5,
            logseqdepth=6) %>%
   column_to_rownames(var="genome") %>%
  select(-intercept) 
  
# Basal ggtree
postestimates_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_table_red, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend") 

postestimates_tree +
        vexpand(.30, 1)  # expand top 

10.4.2 Grey squirrel

# Posterior estimates 

post_estimates_grey <- getPostEstimate(hM=m.grey, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m.grey$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) 

post_table_grey <- post_estimates_grey %>%
    mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label))) %>%
    select(-trend) %>%
    mutate(value = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
            index500=3,
            autumn=4,
            winter=5,
            logseqdepth=6) %>%
   column_to_rownames(var="genome") %>%
  select(-intercept) 
  
# Basal ggtree
postestimates_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_table_grey, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend") 

postestimates_tree +
        vexpand(.30, 1)  # expand top 

10.5 Correlations among bacteria

10.5.1 Red squirrel

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m.red)

#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=rev(hmsc_tree$tip.label))) %>%
      mutate(genome2= factor(genome2, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void() +
            theme(legend.position = "none") 

corr.legend <- get_legend(matrix, position="right") 
corr.legend <- as_ggplot(corr.legend)

vtree <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5) +
  hexpand(0.5) 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtree <- gheatmap(vtree, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') + scale_y_reverse()

vtreeD <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5, layout="dendrogram") 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtreeD <- gheatmap(vtreeD, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') 

# properly align trees to corr matrix with package aplot
matrix %>%  insert_left(vtree, width=.25) %>% insert_top(vtreeD, height=.3) %>% insert_right(corr.legend, width=0.15)

10.5.2 Grey squirrel

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m.grey)

#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=rev(hmsc_tree$tip.label))) %>%
      mutate(genome2= factor(genome2, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void() +
            theme(legend.position = "none") 

corr.legend <- get_legend(matrix, position="right") 
corr.legend <- as_ggplot(corr.legend)

vtree <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5) +
  hexpand(0.5) 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtree <- gheatmap(vtree, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') + scale_y_reverse()

vtreeD <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5, layout="dendrogram") 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtreeD <- gheatmap(vtreeD, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') 

# properly align trees to corr matrix with package aplot
matrix %>%  insert_left(vtree, width=.25) %>% insert_top(vtreeD, height=.3) %>% insert_right(corr.legend, width=0.15)

10.6 Predicted composition by urbanization

10.6.1 Red squirrel

gradient = seq(0,1, by=0.1)
gradientlength = length(gradient)

pred_urban_red <- constructGradient(m.red,
                      focalVariable = "index500",
                      non.focalVariables = list(logseqdepth=list(1),species=list(1), season=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m.red, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(index500=rep(gradient,1000)) %>%
                      pivot_longer(-c(index500), names_to = "genome", values_to = "value")


post_estimates_red %>%
    filter(variable=="index500", 
           genome %in% pred_mags.red) %>% #keep only predictive mags
    select(genome,trend) %>%
    left_join(pred_urban_red, by=join_by(genome==genome)) %>%
    group_by(genome, trend, index500) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=index500, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=custom_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Urbanization index") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
               strip.text.x = element_text(angle = 90, hjust = 0,),
               strip.text.y = element_text(face="bold"),
)

10.6.2 Grey squirrel

gradient = seq(0,1, by=0.1)
gradientlength = length(gradient)

pred_urban_grey <- constructGradient(m.grey,
                      focalVariable = "index500",
                      non.focalVariables = list(logseqdepth=list(1),species=list(1), season=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m.grey, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(index500=rep(gradient,1000)) %>%
                      pivot_longer(-c(index500), names_to = "genome", values_to = "value")

post_estimates_grey %>%
    filter(variable=="index500",
           genome %in% pred_mags.grey) %>% #keep only mags predictive mags
    select(genome,trend) %>%
    left_join(pred_urban_grey, by=join_by(genome==genome)) %>%
    group_by(genome, trend, index500) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=index500, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=custom_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Urbanization index") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
               strip.text.x = element_text(angle = 90, hjust = 0,),
               strip.text.y = element_text(face="bold"),
)

10.7 Predicted composition by season

10.7.1 Red squirrel

aut_enrichment_red <- post_estimates_red %>%
    filter(variable=="seasonautumn") %>%
    select(genome, trend)
win_enrichment_red <- post_estimates_red %>%
    filter(variable=="seasonwinter") %>%
    select(genome, trend)

# m$covNames
gradient = c("spring-summer","autumn", "winter")
gradientlength = length(gradient)

pred_season_red <- constructGradient(m.red, 
                      focalVariable = "season", 
                      non.focalVariables = 1,
                      ngrid=gradientlength) %>%
            predict(m.red, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(season=rep(gradient,1000)) %>%
            pivot_longer(!season,names_to = "genome", values_to = "value")
genome_metadata_clean <- genome_metadata %>%  
    mutate(family = coalesce(family, paste('Unclassified', order)),
           genus = coalesce(genus, 
                              if_else(grepl('^Unclassified', family),
                                      family, paste('Unclassified', family))))

pred_season_red %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  left_join(aut_enrichment_red,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`autumn`, `spring-summer`)) %>%
      mutate(diff_sa = `spring-summer` - `autumn`) %>%
      select(genome, diff_sa) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sa), x=diff_sa, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
            geom_vline(xintercept = 0)+
            annotate('text', x=-3, y=16, label = "Enriched in\nautumn", color='black') +
            annotate('text', x=3, y=16, label = "Enriched in\nspring-summer", color='black') +
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-5,5)

pred_season_red %>%
    filter(genome %in% pred_mags.red) %>% #keep only predictive mags
    left_join(win_enrichment_red,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`winter`, `spring-summer`)) %>%
      mutate(diff_sw = `spring-summer` - `winter`) %>%
      select(genome, diff_sw) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sw), x=diff_sw, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
            annotate('text', x=-10, y=16, label = "Enriched in\nwinter", color='black') +
            annotate('text', x=10, y=16, label = "Enriched in\nspring-summer", color='black') +
            geom_vline(xintercept = 0)+
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-18,18)

10.7.2 Grey squirrel

aut_enrichment_grey <- post_estimates_grey %>%
    filter(variable=="seasonautumn") %>%
    select(genome, trend)
win_enrichment_grey <- post_estimates_grey %>%
    filter(variable=="seasonwinter") %>%
    select(genome, trend)

# m$covNames
pred_season_grey <- constructGradient(m.grey, 
                      focalVariable = "season", 
                      non.focalVariables = 1,
                      ngrid=gradientlength) %>%
            predict(m.grey, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(season=rep(gradient,1000)) %>%
            pivot_longer(!season,names_to = "genome", values_to = "value")
pred_season_grey %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  left_join(aut_enrichment_grey,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`autumn`, `spring-summer`)) %>%
      mutate(diff_sa = `spring-summer` - `autumn`) %>%
      select(genome, diff_sa) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sa), x=diff_sa, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
            geom_vline(xintercept = 0)+
            annotate('text', x=-2.5, y=14, label = "Enriched in\nautumn", color='black') +
            annotate('text', x=2.5, y=14, label = "Enriched in\nspring-summer", color='black') +
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-5,5)

pred_season_grey %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  left_join(aut_enrichment_grey,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`winter`, `spring-summer`)) %>%
      mutate(diff_sw = `spring-summer` - `winter`) %>%
      select(genome, diff_sw) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sw), x=diff_sw, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors) +
            scale_fill_manual(values=alpha(custom_colors,0.4)) +
            geom_vline(xintercept = 0) +
            annotate('text', x=-5, y=14, label = "Enriched in\nwinter", color='black') +
            annotate('text', x=5, y=14, label = "Enriched in\nspring-summer", color='black') +
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-10,10)

10.8 Microbiome functional predictions

tss <- function(abund){sweep(abund, 2, colSums(abund), FUN="/")} 

genome_counts <- genome_counts %>%
  column_to_rownames(var="genome")

#Get list of present MAGs
present_MAGs <- genome_counts %>%
  filter(rowSums(.[, -1]) != 0) %>%
  rownames()

#Align distillr annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(genome_gifts)]
genome_gifts_filt <- genome_gifts[present_MAGs,] %>%
  select_if(~!all(. == 0)) %>%  #remove all-zero modules
  select_if(~!all(. == 1)) #remove all-one modules

GIFTs_elements <- to.elements(genome_gifts_filt, GIFT_db)

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs and get overall metabolic capacity indices per MAG (at the domain level)
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db) %>% as.data.frame() %>%
  mutate(Overall=rowMeans(select(.,Biosynthesis,Structure,Degradation), na.rm=TRUE))

10.8.1 Function-level predictions by urbanisation

10.8.1.1 Red squirrel

community_func_urb_red <- pred_urban_red %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_pred_urb_red <- map_dfc(community_func_urb_red, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_func_urb_red[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated with urbanisation
function_pred_urb_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

#Negatively associated with urbanisation
function_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Positively associated
positive_red <- function_pred_urb_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative_red <- function_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_red <- #bind_rows(positive_red,negative_red)  %>%
  function_pred_urb_red %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") 

all_functions_red %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")

community_func_urb_red %>%
    bind_rows() %>%
    pivot_longer(-index500, names_to = "trait", values_to = "value") %>%
    filter(trait %in% c(positive_red$trait, negative_red$trait)) %>%
    mutate(trait=factor(trait, levels=c(positive_red$trait, negative_red$trait))) %>%
    mutate(index500=as.numeric(index500)) %>%
    ggplot(aes(x=index500, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Urbanization",y="Metabolic Capacity Index") 

10.8.1.2 Grey squirrel

community_func_urb_grey  <- pred_urban_grey %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_pred_urb_grey <- map_dfc(community_func_urb_grey, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_func_urb_grey[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated with urbanisation
function_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

#Negatively associated with urbanisation
function_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Positively associated
positive_grey <- function_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative_grey <- function_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_grey <- #bind_rows(positive_grey,negative_grey) %>%
  function_pred_urb_grey %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

all_functions_grey %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")

community_func_urb_grey %>%
    bind_rows() %>%
    pivot_longer(-index500, names_to = "trait", values_to = "value") %>%
    filter(trait %in% c(positive_grey$trait, negative_grey$trait)) %>%
    mutate(trait=factor(trait, levels=c(positive_grey$trait, negative_grey$trait))) %>%
    mutate(index500=as.numeric(index500)) %>%
    ggplot(aes(x=index500, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Urbanization",y="Metabolic Capacity Index") 

10.8.2 Function-level predictions by season

10.8.2.1 Red squirrel

community_func_seas_red <- pred_season_red  %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

function_pred_seas_red <- map_dfr(community_func_seas_red, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Spring-summer associated
function_pred_seas_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

# Winter associated
function_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Spring-summer associated
positive_red <- function_pred_seas_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Winter associated
negative_red <- function_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_red <- function_pred_seas_red %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

all_functions_red %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait") +
      annotate('text', x=-0.75, y=12, label = "Enriched in\nwinter", color='black') +
      annotate('text', x=0.75, y=12, label = "Enriched in\nspring-summer", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")

10.8.2.2 Grey squirrel

community_func_seas_grey <- pred_season_grey  %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

function_pred_seas_grey <- map_dfr(community_func_seas_grey, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Spring-summer associated
function_pred_seas_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

# Winter associated
function_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Spring-summer associated
positive_grey <- function_pred_seas_grey%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Winter associated
negative_grey <- function_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_grey <- function_pred_seas_grey %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

all_functions_grey %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait") +
      annotate('text', x=-1, y=12, label = "Enriched in\nwinter", color='black') +
      annotate('text', x=1, y=12, label = "Enriched in\nspring-summer", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")

10.8.3 Element-level predictions by urbanisation

10.8.3.1 Red squirrel

community_elem_urb_red <-  pred_urban_red %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_pred_urb_red <- map_dfc(community_elem_urb_red, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elem_urb_red[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated with urbanisation
element_pred_urb_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

#Negatively associated with urbanisation
element_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Positively associated
positive_red <- element_pred_urb_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative_red <- element_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements_red <- bind_rows(positive_red,negative_red) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_red$trait),rev(negative_red$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

all_elements_red %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "right")

community_elem_urb_red %>%
    bind_rows() %>%
    pivot_longer(-index500, names_to = "trait", values_to = "value") %>%
    filter(trait %in% c(positive_red$trait, negative_red$trait)) %>%
    mutate(trait=factor(trait, levels=c(positive_red$trait, negative_red$trait))) %>%
    mutate(index500=as.numeric(index500)) %>%
    ggplot(aes(x=index500, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Urbanization",y="Metabolic Capacity Index") 

10.8.3.2 Grey squirrel

community_elem_urb_grey <- pred_urban_grey  %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_pred_urb_grey <- map_dfc(community_elem_urb_grey, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elem_urb_grey[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated with urbanisation
element_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

#Negatively associated with urbanisation
element_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Positively associated
positive_grey <- element_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative_grey <- element_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)


all_elements_grey <- bind_rows(positive_grey,negative_grey) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_grey$trait),rev(negative_grey$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

all_elements_grey %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "right")

community_elem_urb_grey %>%
    bind_rows() %>%
    pivot_longer(-index500, names_to = "trait", values_to = "value") %>%
    filter(trait %in% c(positive_grey$trait, negative_grey$trait)) %>%
    mutate(trait=factor(trait, levels=c(positive_grey$trait, negative_grey$trait))) %>%
    mutate(index500=as.numeric(index500)) %>%
    ggplot(aes(x=index500, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Urbanization",y="Metabolic Capacity Index") 

10.8.4 Element-level predictions by season

10.8.4.1 Red squirrel

community_elem_seas_red <- pred_season_red  %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

element_pred_seas_red <- map_dfr(community_elem_seas_red, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Spring-summer associated
element_pred_seas_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

# Winter associated
element_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
# Spring-summer associated
positive_red <- element_pred_seas_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

# Winter associated
negative_red <- element_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements_red <- bind_rows(positive_red,negative_red) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_red$trait),rev(negative_red$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

all_elements_red %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait") +
      annotate('text', x=-2, y=20, label = "Enriched in\nwinter", color='black') +
      annotate('text', x=2, y=20, label = "Enriched in\nspring-summer", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")

10.8.4.2 Grey squirrel

community_elem_seas_grey <- pred_season_grey  %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

element_pred_seas_grey <- map_dfr(community_elem_seas_grey, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Spring-summer associated
element_pred_seas_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

# Winter associated
element_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Spring-summer associated
positive_grey <- element_pred_seas_grey%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Winter associated
negative_grey <- element_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements_grey <- bind_rows(positive_grey,negative_grey) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_grey$trait),rev(negative_grey$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

all_elements_grey %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait") +
      annotate('text', x=-2, y=17, label = "Enriched in\nwinter", color='black') +
      annotate('text', x=2, y=17, label = "Enriched in\nspring-summer", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")